# packages
if (!require("librarian")){
install.packages("librarian")
library(librarian)
}
librarian::shelf(
dplyr, DT, dygraphs, glue, gstat, here, lubridate, mapview, purrr, readr,
raster, rmapshaper, sf, skimr, stars, stringr, tidyr)
select <- dplyr::select
# paths
dir_data <- switch(
Sys.info()["nodename"],
`ben-mbpro` = "/Users/bbest/My Drive (ben@ecoquants.com)/projects/calcofi/data",
`Cristinas-MacBook-Pro.local` = "/Volumes/GoogleDrive/.shortcut-targets-by-id/13pWB5x59WSBR0mr9jJjkx7rri9hlUsMv/calcofi/data")
# TODO: get Erin's Google Drive path and "nodename")
bottle_csv <- file.path(dir_data, "/oceanographic-data/bottle-database/CalCOFI_Database_194903-202001_csv_22Sep2021/194903-202001_Bottle.csv")
cast_csv <- file.path(dir_data, "/oceanographic-data/bottle-database/CalCOFI_Database_194903-202001_csv_22Sep2021/194903-202001_Cast.csv")
calcofi_geo <- here("data/calcofi_oceano-bottle-stations_convex-hull.geojson")
calcofi_offshore_geo <- here("data/calcofi_oceano-bottle-stations_convex-hull_offshore.geojson")
calcofi_nearshore_geo <- here("data/calcofi_oceano-bottle-stations_convex-hull_nearshore.geojson")
# check paths
stopifnot(dir.exists(dir_data))
stopifnot(any(file.exists(bottle_csv,cast_csv)))
# read data
d_bottle <- read_csv(bottle_csv, skip=1, col_names = F)
names(d_bottle) <- str_split(
readLines(bottle_csv, n=1), ",")[[1]] %>%
str_replace("\xb5", "µ")
d_cast <- read_csv(cast_csv)
# join data
d <- d_cast %>%
left_join(
d_bottle %>% select(-Sta_ID),
by = "Cst_Cnt") %>%
mutate(Date = lubridate::as_date(Date, format = "%m/%d/%Y"))
# d_cast
skim(d_cast)
# d_bottle
skim(d_bottle)
# get example AOI (Channel Islands NMS)
sanctuaries_geo <- "https://github.com/noaa-onms/onmsR/raw/12a87dfd4b90f2e3009ccb4913315fb2df7afddc/data-raw/sanctuaries.geojson"
cinms_ply <- sf::st_read(sanctuaries_geo) %>%
dplyr::filter(nms == "CINMS")
## Reading layer `sanctuaries' from data source
## `https://github.com/noaa-onms/onmsR/raw/12a87dfd4b90f2e3009ccb4913315fb2df7afddc/data-raw/sanctuaries.geojson'
## using driver `GeoJSON'
## Simple feature collection with 16 features and 2 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -180 ymin: -15.38615 xmax: 180 ymax: 48.50589
## Geodetic CRS: WGS 84
# get AOI geom points as WKT for later use with API
cinms_txt <- sf::st_as_text(cinms_ply$geometry)
#cinms_txt
mapview(cinms_ply)
# for summary, want to group by Sta_Code because each data point has a diff Sta_ID
pts <- d %>%
filter(
!is.na(Lat_Dec),
!is.na(Lon_Dec)) %>%
group_by(
Sta_ID) %>%
summarize(
lon = mean(Lon_Dec),
lat = mean(Lat_Dec)) %>%
st_as_sf(
coords = c("lon", "lat"), crs=4326, remove = F) %>%
separate(
Sta_ID, c("Sta_ID_Line", "Sta_ID_Station"),
sep=" ", remove=F) %>%
mutate(
Sta_ID_Line = as.double(Sta_ID_Line),
Sta_ID_Station = as.double(Sta_ID_Station),
offshore = ifelse(Sta_ID_Station > 60, T, F))
mapview(pts, zcol="offshore")
table(pts$offshore)
##
## FALSE TRUE
## 1534 1100
Per guidance from Erin:
CalCOFI Regions– Station # > 60 = oceanic/offshore; <60 = neritic/nearshore/continental shelf (other regions include those from Venrick et al. 2012 or Stephens et al. 2018
make_hull <- function(x){
st_union(x) %>%
st_convex_hull() %>%
st_buffer(0.1) %>% # ~10 km
ms_simplify(keep = 0.05) }
hull_geos <- c(calcofi_geo, calcofi_nearshore_geo, calcofi_offshore_geo)
if (any(!file.exists(hull_geos))){
hull <- pts %>%
make_hull()
st_write(hull, calcofi_geo)
mapview(hull)
hull_nearshore <- pts %>%
filter(!offshore) %>%
make_hull()
st_write(hull_nearshore, calcofi_nearshore_geo)
mapview(hull_nearshore)
hull_offshore <- pts %>%
filter(offshore) %>%
make_hull()
st_write(hull_offshore, calcofi_offshore_geo)
mapview(hull_offshore)
}
aoi = cinms_ply
# find stations in aoi
pts_aoi <- pts %>%
mutate(
x = st_intersects(pts, aoi) %>% as.logical()) %>%
filter(x)
mapview(aoi) +
mapview(pts_aoi)
get_oceano_var_daily_aoi <- function(var, aoi, depth_min=0, depth_max=10){
# find stations in aoi
pts_aoi <- pts %>%
mutate(
x = st_intersects(pts, aoi) %>% as.logical()) %>%
filter(x)
d_aoi <- d %>%
filter(Sta_ID %in% pts_aoi$Sta_ID)
d_aoi_daily <- d_aoi %>%
filter(
!is.na(.data[[var]]),
Depthm >= depth_min,
Depthm < depth_max) %>%
group_by(Date) %>%
summarize(
var_n = n(),
var_min = min(.data[[var]], na.rm = T),
var_q10 = quantile(.data[[var]], probs = 0.10, na.rm = T),
var_avg = mean(.data[[var]], na.rm = T),
var_q90 = quantile(.data[[var]], 0.90, na.rm = T),
var_max = max(.data[[var]], na.rm = T),
var_sd = sd(.data[[var]], na.rm = T))
}
v <- get_oceano_var_daily_aoi("T_degC", cinms_ply, 0, 10)
datatable(v)
# TODO: make this a function for any variable,
# with a schema to apply for labels and colors
# see: [plot_metric_timeseries()](https://github.com/noaa-onms/onmsR/blob/2e438a9bdff8ee90b8fc811aafae4520c26049ab/R/spatial.R#L16-L49)
x <- v %>% select(
Date,
`10% quantile` = var_q10,
`average` = var_avg,
`90% quantile` = var_q90)
y <- xts::xts(x = x %>% select(-Date), order.by = x %>% pull(Date))
dygraph(
y,
main = "Sea Surface Temperature",
xlab = "Date", ylab = "Temperature (°C)") %>% # ...) %>%
dySeries(
c("10% quantile", "average", "90% quantile"),
label = "Temperature (°C)", color = "Red") %>%
dyRangeSelector(fillColor = " #FFFFFF", strokeColor = "#FFFFFF")
var = "T_degC"
aoi = cinms_ply
depth_min = 0
depth_max = 10
# find stations in aoi
pts_aoi <- pts %>%
mutate(
x = st_intersects(pts, aoi) %>% as.logical()) %>%
filter(x)
d_aoi <- d %>%
filter(Sta_ID %in% pts_aoi$Sta_ID)
d_aoi_daily <- d_aoi %>%
filter(
!is.na(.data[[var]]),
Depthm >= depth_min,
Depthm < depth_max) %>%
group_by(Date, Sta_ID) %>%
summarize(
var_n = n(),
var_avg = mean(.data[[var]], na.rm = T),
.groups = "drop") %>%
arrange(desc(Date))
d_aoi_daily
## # A tibble: 376 × 4
## Date Sta_ID var_n var_avg
## <date> <chr> <int> <dbl>
## 1 2020-01-15 083.3 051.0 2 14.0
## 2 2019-11-14 083.3 051.0 4 16.7
## 3 2019-07-22 083.3 051.0 5 14.5
## 4 2019-04-09 083.3 051.0 2 12.9
## 5 2019-02-11 083.3 051.0 2 14.1
## 6 2018-10-25 083.3 051.0 2 17.2
## 7 2018-06-20 083.3 051.0 2 15.6
## 8 2018-04-17 083.3 051.0 2 12.5
## 9 2018-02-06 083.3 051.0 3 15.2
## 10 2017-11-17 083.3 051.0 2 16.2
## # … with 366 more rows
Wierd: only ~one station per day.
get_oceano_var_cruise_raster <- function(cruise_id, var, depth_min, depth_max){
d_daily <- d %>%
filter(
!is.na(.data[[var]]),
Depthm >= !!depth_min,
Depthm < !!depth_max,
Cruise_ID == !!cruise_id) %>%
group_by(Cruise_ID, Date, Sta_ID) %>%
summarize(
var_n = n(),
var_avg = mean(.data[[var]], na.rm = T),
.groups = "drop") %>%
arrange(desc(Date), Cruise_ID)
d_daily
p <- pts %>%
left_join(
d_daily,
by="Sta_ID") %>%
select(Sta_ID, Date, var_avg) %>%
filter(!is.na(var_avg))
mapview(p, zcol="var_avg")
h <- st_convex_hull(st_union(p)) %>% st_as_sf() %>%
mutate(one = 1)
mapview(h)
r <- raster(as_Spatial(h), res=0.1, crs=4326)
z <- rasterize(as_Spatial(h), r, "one")
# inverse distance weighted interpolation
# https://rspatial.org/raster/analysis/4-interpolation.html
gs <- gstat(formula=var_avg~1, locations=p)
idw <- interpolate(z, gs)
w <- mask(idw, z)
w_tif <- here("data/_test_idw.tif")
raster::writeRaster(w, w_tif, overwrite=T)
w <- raster(w_tif) %>% readAll()
unlink(w_tif)
w
}
w <- get_oceano_var_cruise_raster(
cruise_id = "2020-01-05-C-33RL",
var = "T_degC",
depth_min = 0,
depth_max = 10)
## [inverse distance weighted interpolation]
mapview(w)
summarize:
OCEAN TEMP: T_degC
salinity: Salnty
OXYGEN (HYPOXIA)
ZOOPLANKTON
ICHTHYOPLANKTON for each station ID
Create read_bottle() in calcofi4r